; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnux32 | FileCheck %s
; RUN: not llc < %s -mtriple=i386-unknown-linux-gnu 2>&1 | FileCheck %s --check-prefix=ERROR

; For 32-bit we still error since __int128 isn't supported in the frontend.
; ERROR: error: couldn't allocate output register for constraint 'x'

define { i64, i64 } @foo(i64 %0, i64 %1) {
; CHECK-LABEL: foo:
; CHECK:       # %bb.0:
; CHECK-NEXT:    movq %rsi, %xmm0
; CHECK-NEXT:    movq %rdi, %xmm1
; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT:    #APP
; CHECK-NEXT:    movdqa %xmm1, %xmm0
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    movq %xmm0, %rax
; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT:    movq %xmm0, %rdx
; CHECK-NEXT:    retq
  %3 = zext i64 %1 to i128
  %4 = shl nuw i128 %3, 64
  %5 = zext i64 %0 to i128
  %6 = or i128 %4, %5
  %7 = tail call i128 asm sideeffect "movdqa  $1, $0", "=x,x,~{dirflag},~{fpsr},~{flags}"(i128 %6)
  %8 = trunc i128 %7 to i64
  %9 = lshr i128 %7, 64
  %10 = trunc i128 %9 to i64
  %11 = insertvalue { i64, i64 } undef, i64 %8, 0
  %12 = insertvalue { i64, i64 } %11, i64 %10, 1
  ret { i64, i64 } %12
}
